import xlrd
import os
import xlwt

from Bio import pairwise2
from docx import Document
from docx.shared import RGBColor

global COLOR_WHEEL
global PREFIX_ATG
global PREFIX_NOT_ATG
global SUFFIX

def main():
    
    print 'extracting information'
     #extract information from files on expected and ovserved sequences
    expected_spreadsheet_path = \
    input('Path to spreadsheet with expeted sequences: ')
    observed_folder_path = \
    input('Path to folder with text files containing observed sequences: ')
    
    book = xlrd.open_workbook(expected_spreadsheet_path)
    sheet = book.sheet_by_index(0)
    numrows = sheet.nrows
    book_name = expected_spreadsheet_path.split('/')[-1].split('.')[0]
    
    #Expected_Sequences holds rows from input spreadsheet
    Expected_Sequences = []
    
    for row in range(1,numrows):
        row_array = sheet.row_values(row)
        Expected_Sequences.append(Expect_Sequence(row_array))
        
    #Observed_Sequences associates the contents of files with observed
    #sequences with the name of the file    
    Observed_Sequences = {}
    for observed_file in os.listdir(observed_folder_path):
        observed_file_path = observed_folder_path + '/' + observed_file
        f = open(observed_file_path, 'r')
        observed_sequence = ''.join(f.read().split('\n'))
        Observed_Sequences[observed_file] = observed_sequence.upper()
        f.close()


    print 'comparing values'

    output_path = observed_folder_path.split('/')[-1] + ' Results'
    if not os.path.exists(output_path):
        os.makedirs(output_path)
    

    book = xlwt.Workbook()
    sh = book.add_sheet('Results')
    row_index = 0
    
    #For each expected sequence, compares with observed sequence
    for Expected_Sequence in Expected_Sequences:
        
        try:
                
            #initialize document
            document = Document()
            
            document.add_heading(Expected_Sequence.Name, level = 0)
            document.add_heading('Sequence File:', level = 1)        
            document.add_paragraph(Expected_Sequence.Observed_File)
            document.add_heading('Backbone:', level = 1)
            document.add_paragraph(Expected_Sequence.Backbone)
            document.add_heading('Inserts:', level = 1)
            
            
            sh.write(row_index, 0, Expected_Sequence.Name)
                    
            
            i = 1        
            for insert in Expected_Sequence.Expected_Inserts:
                document.add_paragraph('Insert ' + str(i) + ':')
                document.add_paragraph(insert.Name)
                p = document.add_paragraph('')
                run = p.add_run(insert.Sequence)
                font = run.font
                font.color.rgb = insert.Color
                i += 1
                
            document.add_heading('Observed Sequence', level = 1)
            paragraph = document.add_paragraph('')
            
            print ('comparing ' + Expected_Sequence.Name)
                  
            Observed_Sequence = Observed_Sequences[Expected_Sequence.Observed_File]
                    
            Sequence_Perfect = False 
            Prefix_Perfect = True 
            Suffix_Perfect = True 
            Insert_Perfect = True 
            Insert_Present = False 
            Prefix_Present = False 
            Suffix_Present = False 
            Prefix_Adjacent = False 
            Suffix_Adjacent = False 
            Short_Sequence = False
            Bad_Sequence = (set(Observed_Sequence) == set(['N'])) 
            
            Sequence_Check = ''
            
            pref_point_mut = 0
            pref_ins_mut = 0
            pref_del_mut = 0
            
            ins_point_mut = 0
            ins_ins_mut = 0
            ins_del_mut = 0
            ins_sec_miss = False
    
            suf_point_mut = 0
            suf_ins_mut = 0
            suf_del_mut = 0
    
            base_index = 0 
            
            #reverses sequence if reversed
            if Expected_Sequence.Direction[0] == "R":
                Observed_Sequence = Rev_Comp(Observed_Sequence)
    
            #searches for prefix before expected sequence    
            print 'finding prefix'
            if Expected_Sequence.Expected_Inserts[0].Sequence[:3] == "ATG":
                Prefix_Alignment = pairwise2.align.localms(\
                PREFIX_ATG, Observed_Sequence, 1, -1, -3, -1)
                if Prefix_Alignment:
                    Prefix_Alignment = Prefix_Alignment[0]
                    if Prefix_Alignment[2] < 18:
                        Prefix_Alignment = None
    
                    
            else:
                Prefix_Alignment = pairwise2.align.localms(\
                PREFIX_NOT_ATG, Observed_Sequence, 1, -1, -3, -1)
                if Prefix_Alignment:
                    Prefix_Alignment = Prefix_Alignment[0]
                    if Prefix_Alignment[2] < 18:
                        Prefix_Alignment = None
              
            Last_Prefix_Base = None
            Last_Insert_Base = None
            
            if Prefix_Alignment:
                Prefix_Present = True
                Last_Prefix_Base = Prefix_Alignment[4]
                Before_Prefix = Observed_Sequence[:Prefix_Alignment[3]]      	
                paragraph.add_run(Before_Prefix)                
                Sequence_Check = Sequence_Check + Before_Prefix
                base_index = Prefix_Alignment[3]
    
                for base in Prefix_Alignment[0][\
                Prefix_Alignment[3]:Prefix_Alignment[4]]:
                    
                    if base_index < len(Observed_Sequence):
                        if base == Prefix_Alignment[1][base_index]:
                            run = paragraph.add_run(base)
                            font = run.font
                            font.color.rgb = RGBColor(0,255,0)
                            Sequence_Check = Sequence_Check + base
                                   
                        else:
                            if Prefix_Alignment[1][base_index] != '-':
                                run = paragraph.add_run(Prefix_Alignment[1][base_index])
                                font = run.font
                                if Prefix_Alignment[0][base_index] == '-':
                                    font.color.rgb = RGBColor(222,0,20)
                                    pref_ins_mut += 1
                                else:
                                    pref_point_mut += 1
                                
                                Sequence_Check = \
                                Sequence_Check + Prefix_Alignment[1][base_index]
                            else:
                                paragraph.add_run('_')
                                pref_del_mut += 1
                                
                        
                    base_index += 1
            
                base_index -= (len(Prefix_Alignment[1]) - len(Observed_Sequence))
                
            
            print 'aligning inserts'
            
            #searches individually for parts of expected sequence
            #in order given in input spreadsheet
            for Expect_Insert in Expected_Sequence.Expected_Inserts:
                
                Insert = Expect_Insert.Sequence
    
                if len(Observed_Sequence) < len(Insert):
                    Short_Sequence = True
                
                add_to_last_frag = ''
           
                for y in range(0,len(Insert)//80+1):
                    
                    if y == len(Insert)//80:
                        if len(Insert) % 80 < 30:
                            fragment = Insert[y*80:y*80 + 61]
                            add_to_last_frag = Insert[y*80 + 61:(y+1)*80]
                        else:
                            fragment = Insert[y*80:(y+1)*80]
                    
                    if y == len(Insert)//80 + 1:
                        fragment = add_to_last_frag + Insert[y*80:]
                    else:
                        fragment = Insert[y*80:(y+1)*80]
    
                    
                    frag_alignment = pairwise2.align.localms(\
                    fragment, Observed_Sequence, 1, -1, -3, -1)
    
                    if frag_alignment:
                        frag_alignment = frag_alignment[0]
                        if frag_alignment[2] < (len(fragment) * .6) and \
                        frag_alignment[2] < 40:
                            frag_alignment = None
    
                    if frag_alignment:
                        
                        Insert_Present = True
                                            
                        
                        if y == 0 and Last_Prefix_Base:
                            if ( frag_alignment[3] - Last_Prefix_Base <= 5 ):
                                Prefix_Adjacent = True
                        
                        if y == len(Insert)//80:
                            Last_Insert_Base = frag_alignment[4]
                            
                                               
                        if frag_alignment[3] >= base_index:
                            
                            Before_Insert = Observed_Sequence[base_index:frag_alignment[3]]
                            paragraph.add_run(Before_Insert)
    
                            Sequence_Check = Sequence_Check + Before_Insert
    
                            base_index = frag_alignment[3]
    
                            for base in frag_alignment[0][frag_alignment[3]:frag_alignment[4]]:
                                
                                if base_index < len(Observed_Sequence):
                                    if base == frag_alignment[1][base_index]:
                                    
                                        run = paragraph.add_run(base)
                                        font = run.font
                                        font.color.rgb = Expect_Insert.Color
                                        Sequence_Check = \
                                        Sequence_Check + frag_alignment[1][base_index]
                                
                                    else:
                                        
                                        if frag_alignment[1][base_index] != '-':
                                            run = paragraph.add_run(frag_alignment[1][base_index])
                                            font = run.font
                                            if frag_alignment[0][base_index] == '-':
                                                font.color.rgb = RGBColor(222,0,20)
                                                ins_ins_mut += 1
                                            else:
                                                ins_point_mut += 1
    
                                            Sequence_Check = \
                                            Sequence_Check + frag_alignment[1][base_index]
                                        else:
                                            paragraph.add_run('_')
                                            pref_del_mut += 1                
                                                                
                                base_index += 1
                            
                            base_index -= (len(frag_alignment[1]) - len(Observed_Sequence))
    
                        elif frag_alignment[3] < base_index < frag_alignment[4]:
                            
                            for base in frag_alignment[0][base_index:frag_alignment[4]]:
                                
                                if base_index < len(Observed_Sequence):
                                    
                                    if base == frag_alignment[1][base_index]:
                                    
                                        run = paragraph.add_run(base)
                                        font = run.font
                                        font.color.rgb = Expect_Insert.Color
                                        Sequence_Check = \
                                        Sequence_Check + frag_alignment[1][base_index]
                                    
                                    else:
                                    
                                        
                                        if frag_alignment[1][base_index] != '-':
                                            run = paragraph.add_run(frag_alignment[1][base_index])
                                            font = run.font
                                            if frag_alignment[0][base_index] == '-':
                                                font.color.rgb = RGBColor(222,0,20)
                                                ins_ins_mut += 1
                                            else:
                                                ins_point_mut += 1
    
                                            Sequence_Check = \
                                            Sequence_Check + frag_alignment[1][base_index]
                                        else:
                                            paragraph.add_run('_')
                                            ins_del_mut += 1
                                    
                                base_index += 1
    
                            base_index -= (len(frag_alignment[1]) - len(Observed_Sequence))
                        
                        
                        if frag_alignment[4] >= len(Observed_Sequence):
                            base_index += (len(frag_alignment[1]) - len(Observed_Sequence))
                            break
                        
    
                    else:
                        ins_sec_miss = True
            

            print 'finding suffix'
            
            #searches for suffix after last section of expected sequence
            if Short_Sequence:
                Suffix_Alignment = None
            else:
                Suffix_Alignment = pairwise2.align.localms(\
            	SUFFIX, Observed_Sequence, 1, -1, -3, -1)
            
            if Suffix_Alignment:
                Suffix_Alignment = Suffix_Alignment[0]
                if Suffix_Alignment[2] < 18:
                    Suffix_Alignment = None
    
            if Suffix_Alignment:
                
                Suffix_Present = True
                            
                if Last_Insert_Base:
                    if ((Suffix_Alignment[3] - Last_Insert_Base) <= 5):
                        Suffix_Adjacent = True
                        
                if Suffix_Alignment[3] >= base_index:
                    
                    Before_Suffix = Observed_Sequence[base_index:Suffix_Alignment[3]]
                    
                    paragraph.add_run(Before_Suffix)
                    
                    Sequence_Check = Sequence_Check + Before_Suffix
    
                    base_index = Suffix_Alignment[3]
                    
                    for base in Suffix_Alignment[0][\
                    Suffix_Alignment[3]:Suffix_Alignment[4]]:
                        
                        if base_index < len(Observed_Sequence):
                            if base == Suffix_Alignment[1][base_index]:
                                run = paragraph.add_run(base)
                                font = run.font
                                font.color.rgb = RGBColor(0,255,0)
                                Sequence_Check = \
                                Sequence_Check + Suffix_Alignment[1][base_index]
                        
                            else:
                          
                                if Suffix_Alignment[1][base_index] != '-':
                                    run = paragraph.add_run(Suffix_Alignment[1][base_index])
                                    font = run.font
                                    if Suffix_Alignment[0][base_index] == '-':
                                        font.color.rgb = RGBColor(222,0,20)
                                        suf_ins_mut += 1
                                    else:
                                        suf_point_mut += 1
    
                                    Sequence_Check = \
                                    Sequence_Check + Suffix_Alignment[1][base_index]
                                else:
                                    paragraph.add_run('_')
                                    suf_del_mut += 1
    
                        base_index += 1
                    
                    base_index -= (len(Suffix_Alignment[1]) - len(Observed_Sequence))
    
                elif Suffix_Alignment[3] < base_index < Suffix_Alignment[4]:
                    
                    for base in Suffix_Alignment[0][\
                    base_index:Suffix_Alignment[4]]:
                        
                        if base_index < len(Observed_Sequence):
                            if base == Suffix_Alignment[1][base_index]:
                                run = paragraph.add_run(base)
                                font = run.font
                                font.color.rgb = RGBColor(0,255,0)
                                Sequence_Check = \
                                Sequence_Check + Suffix_Alignment[1][base_index]
                            
                            else:
                            
                                if Suffix_Alignment[1][base_index] != '-':
                                    run = paragraph.add_run(Suffix_Alignment[1][base_index])
                                    font = run.font
                                    if Suffix_Alignment[0][base_index] == '-':
                                        font.color.rgb = RGBColor(222,0,20)
                                        suf_ins_mut += 1
                                    else:
                                        suf_point_mut += 1
    
                                    Sequence_Check = \
                                    Sequence_Check + Suffix_Alignment[1][base_index]
                                else:
                                    paragraph.add_run('_')
                                    suf_del_mut += 1
    
                                suf_alter_score += 1
                            
                        base_index += 1   
    
                    base_index -= (len(Suffix_Alignment[1]) - len(Observed_Sequence))
                
            paragraph.add_run(Observed_Sequence[base_index:]) 
            
            #keeps track of number and types of mutations
            #puts info in summary spreadsheet
            if pref_del_mut + pref_point_mut + pref_ins_mut != 0:
                Prefix_Perfect = False
    
            if ins_del_mut + ins_point_mut + ins_ins_mut != 0:
                Insert_Perfect = False
    
            if suf_del_mut + suf_point_mut + suf_ins_mut != 0:
                Suffix_Perfect = False
    
            if Prefix_Perfect and Insert_Perfect and Suffix_Perfect and \
            Prefix_Adjacent and Suffix_Adjacent:
                Sequence_Perfect = True
            
            if Sequence_Perfect:
                sh.write(row_index,1,'sequence perfect')
            
            elif Bad_Sequence:
                sh.write(row_index,1,'bad sequence')
            
            else:
                result = ''
               
                if not Prefix_Present:
                    result = result + 'prefix not found, '
                if Prefix_Present and not Prefix_Adjacent and Insert_Present:
                    result = result + 'prefix not adjacent to insert, '
                if Prefix_Present and not Prefix_Perfect:
                    pref_report = []
                    if pref_point_mut > 0:
                        pref_report.append(str(pref_point_mut) + ' point mutations, ')
                    if pref_ins_mut > 0:
                        pref_report.append(str(pref_ins_mut) + ' insertions, ')
                    if pref_del_mut > 0:
                        pref_report.append(str(pref_del_mut) + ' deletions, ')
                    result = result + 'prefix altered (' + ''.join(pref_report)[:-2] + '), '
                    
                if not Suffix_Present:
                    result = result + 'suffix not found, '
                if Suffix_Present and not Suffix_Adjacent and Insert_Present:
                    result = result + 'suffix not adjacent to insert, '
                if Suffix_Present and not Suffix_Perfect:
                    suf_report = []
                    if suf_point_mut > 0:
                        suf_report.append(str(suf_point_mut) + ' point mutations, ')
                    if suf_ins_mut > 0:
                        suf_report.append(str(suf_ins_mut) + ' insertions, ')
                    if suf_del_mut > 0:
                        suf_report.append(str(suf_del_mut) + ' deletions, ')
                    result = result + 'insert altered (' + ''.join(suf_report)[:-2] + '), '
                
                if not Insert_Present:
                    result = result + 'insert not found, '
                
                if Insert_Present and not Insert_Perfect:
                    if ins_sec_miss:
                        result = result + 'could not find section of insert, '
                    else:
                        ins_report = []
                        if ins_point_mut > 0:
                            ins_report.append(str(ins_point_mut) + ' point mutations, ')
                        if ins_ins_mut > 0:
                            ins_report.append(str(ins_ins_mut) + ' insertions, ')
                        if ins_del_mut > 0:
                            ins_report.append(str(ins_del_mut) + ' deletions, ')
                        result = result + 'insert altered (' + ''.join(ins_report)[:-2] + '), '
    
                if Short_Sequence:
                    result = 'sequence short, ' + result 
                
                if result:
                    result = result[:-2]
                    sh.write(row_index,1,result)
                else:
                    sh.write(row_index,1,'anomalous result')
            
            row_index += 1
            
            Sequence_Check = Sequence_Check + Observed_Sequence[base_index:]
    
            if Sequence_Check == Observed_Sequence:
                print "printed sequence correct"
            else:
                print "printed sequence altered. check against original sequence"
                print Sequence_Check
    
            document.save(output_path + '/' +  Expected_Sequence.Name + '.docx')
         
            book.save(output_path + '/' + book_name + ' result summary.xlsx')
        
        except:
            
            print 'Problem Encountered. Skipping to Next Sequence.'
            
            
class Expect_Sequence:
    """Holds information from row in input spreadsheet"""
    def __init__(self, row_values):
        self.Name = row_values[0]
        self.Observed_File = str(row_values[1])
        self.Direction = str(row_values[2]).strip(' ').upper()
        self.Backbone = row_values[3]
        self.Expected_Inserts = row_values[4:]
        self.Colors = Color_Wheel()

        for i in range(len(self.Expected_Inserts)):
            self.Expected_Inserts[i] = ''.join(str(self.Expected_Inserts[i]).split('\n')).upper()
        self.Expected_Inserts = [x for x in self.Expected_Inserts if x != '']

        temp = []
        insert_names = self.Expected_Inserts[::2]
        insert_sequences = self.Expected_Inserts[1::2]

        for i in range(len(insert_names)):
            temp.append(Expected_Insert(insert_names[i],insert_sequences[i],self.Colors.Color()))
        
        self.Expected_Inserts = temp


class Expected_Insert:
    """Holds part of expected sequence"""
    def __init__(self, Name, Sequence, Color):
        self.Name = Name
        self.Sequence = Sequence
        self.Color = Color

class Color_Wheel:
    """Holds and returns color assignments for Word"""
    def __init__(self):
        self.Wheel = COLOR_WHEEL
        self.Pos = -1
    def Color(self):
        self.Pos += 1
        return self.Wheel[self.Pos%6]

#reverse complement
def Rev_Comp(seq):
    seq = seq.upper()
    rev_comp = ''
    for base in seq[::-1]:
        if base == 'A':
            base_comp = 'T'
        elif base == 'C':
            base_comp = 'G'
        elif base == 'T':
            base_comp = 'A'
        elif base == 'G':
            base_comp = 'C'
        else:
            base_comp = base
        rev_comp = rev_comp + base_comp
    return rev_comp


yellow = RGBColor(255,225,45)
pink = RGBColor(235,50,110)
sky_blue = RGBColor(5,115,170)
ochre = RGBColor(250,155,55)
pale_orange = RGBColor(255,185,135)
magenta = RGBColor(160,50,115)

COLOR_WHEEL = [yellow,pink,sky_blue,ochre,pale_orange,magenta]
PREFIX_ATG = 'GAATTCGCGGCCGCTTCTAG'
PREFIX_NOT_ATG = 'GAATTCGCGGCCGCTTCTAGAG'
SUFFIX = 'TACTAGTAGCGGCCGCTGCAG'


main()